## Projecting average incomes by age group, income group to 2050


# Preliminaries -----------------------------------------------------------

rm(list=ls())
gc()


# Read starting cohort data -----------------------------------------------

starting_cohorts <- qread("./Input data/starting_cohorts_aiwbh_p.qs")

## Pull out just income data
starting_inc <- starting_cohorts %>% 
  distinct(age_grp2, total_inc_qtile, wages_av_smooth, other_inc_av_smooth)


# Read AWE wage growth data ---------------------------------------------------

## Actual wage growth data to 2012
abs_awe2012 <- read_excel("./Input data/Intermediate input data/ABS AWE 2006 to 2012.xlsx",
                          range="A6:B31") %>% 
  setNames(c("date", "awe")) %>% 
  ## calculate year average
  mutate(year = year(date)) %>% 
  filter(year!=2012) %>% 
  group_by(year) %>% 
  mutate(awe_year = mean(awe)) %>% 
  ungroup()


## Actual Wage growth data 2012 to 2020
abs_awe <- read_excel("./Input data/Intermediate input data/ABS 6302002 AWE seasonally adjusted Nov 2020.xls",
                      sheet = 2, skip=9) %>% 
  ## rename first col
  setNames(c("date", names(.)[-1])) %>% 
  ## select required cols - date and `Earnings; Persons; Total earnings ;`
  select(date, awe = A84998735A) %>% 
  ## calculate year average
  mutate(year = year(date)) %>% 
  group_by(year) %>% 
  mutate(awe_year = mean(awe)) %>% 
  ungroup() %>% 
  ## combine with pre 2012 data
  rbind(abs_awe2012) %>% 
  ## calculate growth
  distinct(year, awe_year) %>% 
  arrange(year) %>% 
  mutate(awe_growth = awe_year/lag(awe_year)-1)
  



# Combined projection data to use -----------------------------------------

## use ABS actuals to 2018. Assume convergence to 4% over 10 years from then on
awe_combined <- left_join(data.frame(year=c(2000:2060)),
                          abs_awe %>% filter(year<=2018) %>% select(year, awe_growth)) %>% 
  fill(awe_growth) %>% 
  mutate(awe_growth = case_when(
    year <= 2018 ~ awe_growth,
    year %in% c(2019:2028) ~ awe_growth-(awe_growth-0.04)/9*(year-2018-1),
    year > 2028 ~ 0.04
  )) %>% 
  mutate(awe_growth_temp = ifelse(is.na(awe_growth), 0, awe_growth),
         awe_growth_factor_2000 = cumprod(1+awe_growth_temp),
         awe_growth_factor_2006 = awe_growth_factor_2000/awe_growth_factor_2000[7],
         awe_growth_factor_2018 = awe_growth_factor_2000/awe_growth_factor_2000[19]) %>% 
  select(-awe_growth_temp) %>% 
  data.table

# ggplot(awe_combined %>% filter(year>=2018)) +
#   geom_line(aes(x = year, y = awe_growth)) +
#   scale_y_continuous(limits = c(0,0.04))


qsave(awe_combined, "./Input data/AWE_projections.qs") ## used for discounting


# Apply wage growth data to starting cohorts ------------------------------

## for each year of interest
income_projection <- lapply(2018:2050,
                            function(x) {
                              ## get starting income in 2018
                              starting_inc %>% 
                                mutate(year=x) %>% 
                                ## join with awe growth projection
                                left_join(awe_combined %>% select(year, awe_growth_factor_2018)) %>% 
                                ## multiply 2018 incomes by growth factor
                                mutate(across(contains("smooth"), ~.x*awe_growth_factor_2018)) %>% 
                                select(-awe_growth_factor_2018)
                            }
) %>% 
  rbindlist


qsave(income_projection, "./Input data/income_projections_ai.qs")


